/*
Summary statistics for the entrepreneur sample. Results underlying Table 1.
*/

clear
cd "$root"

do "$root/code/admin/ado/stack_weight_panel.ado"

global figpath $root/estimates/admin/regression

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear

* identify business education
gen business_ed = (edbfield == 4)
gen college_degree = (edlevel == 7)

* marital status in t + 1
replace year = year + 1
merge m:1 lnr year using data/admin/raw/marital_cohabit_91_20.dta, keep(1 3) nogen
gen married_cohabit = inlist(marital, 0, 2) if marital != .
replace year = year - 1

* any children
gen any_child = (year_first_child < year & year_first_child != .)

global outcomes_raw ///
    operating_profits ///
    profit_before_self ///
    revenues ///
    costs_before_self ///
    business_ed ///
    college_degree ///
    any_child ///
    married_cohabit ///
    n_emp_excl_owners ///
    owner_salaries
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_byear firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)
    
* firm industries
gen industry_trade = ind_cat == "Wholesale and retail trade"
gen industry_accommodation = ind_cat == "Accommodation and food service"
gen industry_other_service = ind_cat == "Other service"
gen industry_professional = ind_cat == "Professional, scientific, and technical"
gen industry_real_estate = ind_cat == "Real estate"
gen industry_administrative = ind_cat == "Administrative and support service"
gen industry_arts = ind_cat == "Arts, entertainment, and recreation"
gen industry_construction = ind_cat == "Construction"
gen industry_education = ind_cat == "Education"
gen industry_health = ind_cat == "Human health"
gen industry_info = ind_cat == "Information and communication"
gen industry_manufac = ind_cat == "Manufacturing"
gen industry_transport = ind_cat == "Transportation and storage"

** panel A: owner characteristics in t = 0
* education, marital status, age

tempfile unwtd
tempfile wtd
tempfile counts_unwtd

* unweighted, individual-level
preserve
cap drop flag_lnr 
bys lnr year : gen flag_lnr = (_n == 1)
keep if flag_lnr == 1
keep if (year == cohort_id - 1 & treated == 1) | (treated == 0)
collapse (mean) married_cohabit business_ed college_degree age any_child, by(treated male)
save `unwtd', replace
restore 

* unweighted, firm characteristics
preserve
* measured in t = 0 for treated
keep if (year == cohort_id - 1 & treated == 1) | (treated == 0)
* weight by owner (each owner receives a weight of 1)
gen ones = 1
bys lnr year : egen owner_nfirms = sum(ones)
gen owner_wt = 1 / owner_nfirms
replace firm_age = . if firm_age >= 100
collapse (mean) firm_age industry_* [aweight=owner_wt], by(treated male)
merge 1:1 treated male using `unwtd', assert(3) nogen
save `unwtd', replace
restore 

* firm balance sheet items (means, medians, standard deviations)
* unweighted
preserve
* measured in t = 0 for treated
keep if (year == cohort_id - 1 & treated == 1) | (treated == 0)
* for consistency in the table (can't parse out owner salaries after 2018)
replace revenues = . if year > 2018

* weight by owner (each owner receives a weight of 1)
gen ones = 1
bys lnr year : egen owner_nfirms = sum(ones)
gen owner_wt = 1 / owner_nfirms
collapse (mean) operating_profits_mean = profit_before_self ///
                revenues_mean = revenues ///
		costs_mean = costs_before_self ///
		owner_sal_mean = owner_salaries ///
		emp_excl_owner_mean = n_emp_excl_owners ///
        (median) operating_profits_med = profit_before_self ///
		 revenues_med = revenues ///
		 costs_med = costs_before_self ///
		 owner_sal_med = owner_salaries ///
		 emp_excl_owner_med = n_emp_excl_owners ///
        (sd) operating_profits_sd = profit_before_self ///
	     revenues_sd = revenues ///
	     costs_sd = costs_before_self ///
	     owner_sal_sd = owner_salaries ///
	     emp_excl_owner_sd = n_emp_excl_owners ///
     [aweight=owner_wt], by(treated male)
merge 1:1 treated male using `unwtd', assert(3) nogen
gen weighted = 0
save `unwtd', replace
restore 

* weighted, firm + individual characteristics
preserve
keep if (year == cohort_id - 1)
replace firm_age = . if firm_age >= 100
collapse (mean) married_cohabit business_ed college_degree age any_child ///
    firm_age industry_* [aweight=X_wt], by(treated male)
save `wtd', replace
restore 

* firm balance sheet items (means, medians, standard deviations)
* weighted
preserve
* measured in t = -1
keep if (year == cohort_id - 1)
collapse (mean) operating_profits_mean = profit_before_self ///
                revenues_mean = revenues ///
		costs_mean = costs_before_self ///
		owner_sal_mean = owner_salaries ///
		emp_excl_owner_mean = n_emp_excl_owners ///
        (median) operating_profits_med = profit_before_self ///
		 revenues_med = revenues ///
		 costs_med = costs_before_self ///
		 owner_sal_med = owner_salaries ///
		 emp_excl_owner_med = n_emp_excl_owners ///
        (sd) operating_profits_sd = profit_before_self ///
	     revenues_sd = revenues ///
	     costs_sd = costs_before_self ///
	     owner_sal_sd = owner_salaries ///
	     emp_excl_owner_sd = n_emp_excl_owners ///
     [aweight=X_wt], by(treated male)
gen weighted = 1
merge 1:1 treated male using `wtd', assert(3) nogen
append using `unwtd'
save `wtd', replace
restore 

* get number of firms and owners
preserve
keep if year == cohort_id - 1
bys lnr : gen lnr_flag = _n == 1 // one per owner, can sum
bys lfirm : gen lfirm_flag = _n == 1 // one per firm, can sum

collapse (sum) n_firms = lfirm_flag ///
               n_owners = lnr_flag, ///
	 by(treated male)
gen weighted = 0
save `counts_unwtd', replace
restore

preserve
keep if year == cohort_id - 1
bys lnr : gen lnr_flag = _n == 1 // one per owner, can sum
bys lfirm : gen lfirm_flag = _n == 1 // one per firm, can sum

gen lnr_flag_wt = (lnr_flag) * (X_wt > 0)
gen lfirm_flag_wt = (lfirm_flag) * (X_wt > 0)

collapse (sum) n_firms = lfirm_flag_wt ///
               n_owners = lnr_flag_wt, ///
	 by(treated male)
gen weighted = 1
append using `counts_unwtd'
merge 1:1 treated male weighted using `wtd', assert(3) nogen

export delimited using ${figpath}/penalty_summary_stats.csv, replace
restore
